cd "C:\Users\ollfo959\Dropbox\Sexual assault\Labor market inequality and sexual harassment\Final submission\Replication files\"

*Open Data
use survey_experiment, clear

*Recode education into the 7 categories used in the main analysis
replace edu=4 if edu==5 |edu==6
replace edu=5 if edu==7
replace edu=6 if edu==8
replace edu=7 if edu==9

*Add data on sex composition in 2-digit occupation (obtained from admistrative data)
joinby ssyk2 using ssyk2_shw_2015, unmatched(master)

*Create separate variables for male and female victims in the vignettes
gen pref_m_1_a= q45==1| q46==1 if q45!=. | q46!=.  
gen pref_m_1_b= q45==2 | q46==2  if q45!=. | q46!=.  
gen pref_m_2_a= q50==1| q51==1  if q50!=. | q51!=. 
gen pref_m_2_b= q50==2 | q51==2  if q50!=. | q51!=. 
gen pref_m_3_a= q55==1| q56==1  if q55!=. | q56!=.  
gen pref_m_3_b= q55==2 | q56==2  if q55!=. | q56!=.  
gen pref_k_1_a= q60==1| q61==1 if  q60!=.| q61!=.
gen pref_k_1_b= q60==2| q61==2 if   q60!=.| q61!=.
gen pref_k_2_a= q65==1| q66==1 if q65!=.| q66!=.
gen pref_k_2_b=  q65==2| q66==2 if  q65!=.| q66!=.
gen pref_k_3_a=  q70==1| q71==1 if  q70!=.| q71!=.
gen pref_k_3_b=  q70==2| q71==2 if   q70!=.| q71!=.
gen id=_n

*Keep variables used for analysis
keep q80* lon_m_* arbetsupg_m_* inflytande_m_* arbetsmilj_m_* lon_k_* arbetsupg_k_* inflytande_k_* arbetsmilj_k_* pref_k_* pref_m_* id sex q74 q40 q38 q76 q78  pinc edu age6 ssyk*

*Reshape so that each job is a observation
reshape long lon_k_@ arbetsupg_k_@ inflytande_k_@ arbetsmilj_k_@  lon_m_@  arbetsupg_m_@ inflytande_m_@ arbetsmilj_m_@  pref_k_@ pref_m_@, j(alt) i( sex id) string

*If the respondent has no colleagues in their current workplace, use their response about
*typical workplaces in their occupation to approximate the workplace's sex composition
gen q38_alt = q38
replace q38_alt=q40 if q38==4	


*Generate  specific dummies for each job trait, seperate for each victim sex
*m is male; k is female
foreach gen in m k{
	gen ej_ut_`gen'=arbetsupg_`gen'=="Low skill development"  if pref_`gen'!=.
	gen mkt_ut_`gen'=arbetsupg_`gen'=="High skill development"  if pref_`gen'!=.

	gen ej_inf_`gen'= inflytande_m_=="No own influence"  if pref_`gen'!=.
	gen fri_sch_`gen'= inflytande_m_=="Entirely flexible schedule" if pref_`gen'!=.

	gen low_5_`gen'= lon_`gen'=="5% less than your current wage"  if pref_`gen'!=.
	gen high_5_`gen'= lon_`gen'=="5 more than your current wage"  if pref_`gen'!=.
	gen high_10_`gen'= lon_`gen'=="10% more than your current wage"  if pref_`gen'!=.

	tab arbetsmilj_`gen'_, gen(dum_)

	gen happy_`gen' = dum_1==1  if pref_`gen'!=.
	gen conf_`gen' = dum_6==1  if pref_`gen'!=.
	gen sekuhara_`gen'= dum_2==1 | dum_3==1 | dum_5==1 if pref_`gen'!=.
	gen noinfo_`gen'= dum_4==1  if pref_`gen'!=.
	drop dum_*
}

*Generate dummies for each harassment vignette
foreach gen in m k{
	tab arbetsmilj_`gen'_, gen(dum_)
	gen grope_`gen'= dum_2==1  if pref_`gen'!=.
	gen sexual_`gen'=  dum_3==1 if pref_`gen'!=.
	gen sexist_`gen'=  dum_5==1 if pref_`gen'!=.
	drop dum_*
}

*Generate joint dummies for job traits
egen pref_b=  rowtotal( pref_k pref_m)
foreach var in ej_ut mkt_ut ej_inf fri_sch low_5 high_5 high_10 happy ///
 conf sekuhara noinfo grope sexual sexist {
	gen `var'_b=`var'_k if pref_k!=.
	replace `var'_b=`var'_m if pref_m!=.
}

*Define dummies for choices with a dominant alternative and for the dominant alternative
*First define dummy for each choice
gen alt_id=substr(alt,1,1)

*Create variable for each trait that is higher the better the option
gen dev_lev =2 
replace dev_lev=1 if ej_ut_b==1
replace dev_lev=3 if  mkt_ut_b==1
gen flex_lev =2
replace flex_lev=1 if ej_inf_b==1
replace flex_lev=3 if fri_sch_b==1
gen wage_lev=2
replace wage_lev =1 if low_5_b==1
replace wage_lev = 3 if high_5_b==1
replace wage_lev = 4 if high_10_b==1
gen arb_lev=2
replace arb_lev=3 if happy_b==1
replace arb_lev=1 if conf_b==1 |  sekuhara_b==1
  
*For each trait and pair, calculate the maximum value and set dummy for being the 
*best job on a specific option
foreach var in  dev flex wage arb {
	bysort  id alt_id: egen `var'_max= max(`var'_lev)
	gen `var'_maxd=`var'_lev==`var'_max
 }

*Set dummy for if the values on a trait is the same
foreach var in  dev flex wage arb {
	bysort  id alt_id: egen `var'_mean= mean(`var'_lev)
	gen `var'_equal=`var'_lev==`var'_mean
  }


*Calculate the number of traits where the "best alterantive" is better or equal 
egen sum_maxsum=rowtotal (dev_maxd flex_maxd wage_maxd arb_maxd)
bysort  id alt_id: egen  best_alt= max(sum_maxsum)

*If one job is better or equal on all 4 traits, the choice has a dominant alternative
gen dominant=best_alt==4  
gen dom_op=sum_maxsum==4

*Define an individual as choosing the "wrong" altenative if they do not pick the 
*best alternative when there is a dominant option. These are the inattentive respondents
gen best_option =sum_maxsum==4
egen sum_equal=rowtotal (dev_equal flex_equal wage_equal arb_equal)
gen wrong =  best_option ==1 & pref_b==0  if dominant==1 & sum_equal!=4
bysort id: egen inattent=max(wrong)

*Define dummies for relevant subsamples used in analysis
gen all=1
gen wom=sex==1
gen man=sex==2
gen vic= (man==1 & pref_k!=.)| (wom==1 & pref_m!=.)
gen by= (man==1 & pref_k==.)| (wom==1 & pref_m==.)

gen high_risk=q78>1 if q78!=.
gen low_risk=q78==1 if q78!=.
gen aware=  q76 ==2 | q76==3 if q74==1 | q76<4
gen not_aw=  q74==1 | q76==1 if   q74==1 | q76<4
gen min_occ=  ssyk2_wsh>.5 if  ssyk2_wsh!=. &  wom==0
replace min_occ= 0 if  ssyk2_wsh>.5 &  ssyk2_wsh!=. &  wom==1
replace min_occ= 1 if  ssyk2_wsh<.5 &  ssyk2_wsh!=. &  wom==1
gen maj_occ=1-min_occ
gen ssyk2_msh=1-ssyk2_wsh
gen wd = q38_alt==1
gen mix = q38_alt==3
gen md = q38_alt==2

egen con_id=group (alt_id id)

*Add weight data, drop if weights cannot be calculated
drop if wom ==. | age6 ==. | edu ==. | pinc==. 
joinby  wom age6 edu pinc using "C:\Users\ollfo959\Documents\medborgarpanelen\weights_med_clean.dta", unmatched(master) _merge(_mergew)
gen samp=dominant==0
egen samp_tot=sum(samp)
bysort wom alder5 edu2: egen gen_edu2_alder5_medtot =sum(samp)
gen gen_edu2_alder5_w=(gen_edu2_alder5_tot/syss_tot)/(gen_edu2_alder5_medtot/samp_tot)

egen tag_id =tag(id)
gen aw_all =aware==1 & all==1
gen sec_hara =q74>=3 if q74!=.
gen sec_nohara =q74<=2 if q74!=.
bysort con_id: egen seku_sum=sum(sekuhara_b)

*Calculate stated relative importantance given to each traits
egen mean_imp= rowmean( q80_1- q80_4)
gen sh_imp= q80_1 >mean_imp if   q80_1!=.  
gen sh_noimp= q80_1 < mean_imp if   q80_1!=.
gen flex_imp= q80_2 >mean_imp if  q80_2!=.
gen dev_imp= q80_3 >mean_imp if  q80_3!=.
gen wage_imp= q80_4 >mean_imp if   q80_4!=.
gen sh_vimp=q80_1>=5 if q80_1!=.
gen sh_notvimp=q80_1<5 if q80_1!=.

 foreach var in ej_ut mkt_ut ej_inf fri_sch low_5 high_5 high_10 happy conf sekuhara  noinfo grope sexual sexist {
	gen `var'_vic=`var'_b*vic
}
foreach var in ej_ut mkt_ut ej_inf fri_sch low_5 high_5 high_10 happy conf sekuhara noinfo  grope sexual sexist  {
	gen `var'_by=`var'_b*by
}
